Modern cameras provide great detail and resolution, but lack key information: depth. A camera based on the pinhole model, can not capture depth using one image alone. Stereo vision is the process of extracting 3-D information from multiple 2-D views of a scene. In order to extract depth, for example, one approach would be using the setup shown below where two images are taken such that they differ only in the x dimension. These different persepctives can allow us to use the disparity in their images and the parameters of the camera to calculate depth.

from PIL import Image
import matplotlib.pyplot as plt
stereovision = Image.open('../images/StereoVision.png')
plt.figure(figsize=(10,7))
plt.imshow(stereovision)
plt.show()
Images were taken of my living room each aligned with the edge of a table and offset by a baseline of 25cm.
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
if on_colab:
image_dir = 'Depth-from-Disparity/images/living_room/'
else:
image_dir = '../images/living_room/'
images_available = sorted(os.listdir(image_dir), key= lambda image_path: int(image_path.split("_")[-2]))
images = [Image.open(os.path.join(image_dir, image_path)).rotate(180) for image_path in images_available]
images = [image.resize((800,600)) for image in images]
# All 5 images
num_images = len(images)
fig, ax = plt.subplots(nrows=1, ncols=num_images, sharey=True, figsize=(30, 20))
for axis, image, image_path in zip(ax, images, images_available):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
# The first and last image with a baseline of 100cm (1m)
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, [images[0]] + [images[-1]], [images_available[0]] + [images_available[-1]]):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
# The second and fourth image with a baseline of 50cm
chosen_images = [1, 3]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, [images[chosen_images[0]]] + [images[chosen_images[1]]],
[images_available[chosen_images[0]]] + [images_available[chosen_images[1]]]):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
chosen_grayscale_images = [get_intensity_image(images[i])/255 for i in chosen_images]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, chosen_grayscale_images,
[images_available[chosen_images[0]]] + [images_available[chosen_images[1]]]):
axis.imshow(image, cmap='gray')
axis.set_title("Normalized Grayscale Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
# Checking to see if the correspondence is working
i = 200
j = 150
kernel_Y_min = i - margin
kernel_Y_max = i + margin + 1
kernel_X_min = j - margin
kernel_X_max = j + margin + 1
print(kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max)
min_Y_window = max(i - search_range, 0)
max_Y_window = min(i + search_range + 1, chosen_grayscale_images[0].shape[0])
min_X_window = j - margin
max_X_window = min(j + (2*search_range) + 1, chosen_grayscale_images[0].shape[1])
print(min_Y_window, max_Y_window, min_X_window, max_X_window)
192 209 142 159 175 226 142 201
# Area from right image to be matched in left image
plt.imshow(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max], cmap='gray')
plt.title('Area to be matched from right image')
plt.show()
# Area in left image to be searched for right image portion
fig,ax = plt.subplots(ncols=2, nrows=1, figsize=(16, 7))
ax[0].imshow(chosen_grayscale_images[0][min_Y_window:max_Y_window, min_X_window:max_X_window], cmap='gray')
ax[0].set_title('Area to be searched in left image')
ax[1].imshow(chosen_grayscale_images[1][min_Y_window:max_Y_window, min_X_window:max_X_window], cmap='gray')
ax[1].set_title('Same area in the right image (to show disparity)')
plt.show()
# Output of correspondence (second coordinate is discrepency)
output = correspondence_2D(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max],
chosen_grayscale_images[0][min_Y_window:max_Y_window, min_X_window:max_X_window])
max_coords = np.unravel_index(np.argmin(output, axis=None), output.shape)
print(max_coords)
(17, 8)
# Check the returned area against the original
fig,ax = plt.subplots(ncols=2, nrows=1, figsize=(16, 7))
ax[0].imshow(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max], cmap='gray')
ax[0].set_title('Original region in right image')
ax[1].imshow(chosen_grayscale_images[0][min_Y_window+max_coords[0]:min_Y_window+max_coords[0]+(kernel_Y_max-kernel_Y_min),
min_X_window+max_coords[1]:min_X_window+max_coords[1]+(kernel_X_max-kernel_X_min)], cmap='gray')
ax[1].set_title('Corresponding region in left image')
plt.show()
Images were taken of City College from across the street each aligned with the edge of a window and offset by a baseline of 11 inches (the length of a peice of paper).
import os
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
if on_colab:
image_dir = 'Depth-from-Disparity/images/city_college/'
else:
image_dir = '../images/city_college/'
images_available = sorted(os.listdir(image_dir), key= lambda image_path: int(image_path.split("_")[-2]))
images = [Image.open(os.path.join(image_dir, image_path)).rotate(180) for image_path in images_available]
images = [image.resize((800,600), resample=Image.LANCZOS) for image in images]
# All 5 images
num_images = len(images)
fig, ax = plt.subplots(nrows=1, ncols=num_images, sharey=True, figsize=(30, 20))
for axis, image, image_path in zip(ax, images, images_available):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
# The first and last image with a baseline of 44in
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, [images[0]] + [images[-1]], [images_available[0]] + [images_available[-1]]):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
import numpy as np
def correspondence_2D(array_a, array_b, center_y = 0, center_x = 0):
# convolves array_a across array_b but subtracts rather than multiplies
# assumes dimensions of array_b greater than or equal to corresponding of array_a
results = np.zeros(array_b.shape[:2])
convs = 0
for y in range(len(array_b)):
for x in range(len(array_b[0])):
if convs % 100 == 0:
pass
convs += 1
for i in range(len(array_a)):
for j in range(len(array_a[0])):
try:
try:
a = array_a[i,j]
except:
a = 0
try:
idx_y = y+i-center_y
idx_x = x+j-center_x
if idx_y < 0 or idx_x < 0:
b = 0
else:
b = array_b[idx_y,idx_x]
except:
b = 0
results[y,x] += np.absolute(a-b)
#print(y,x,i,j,y+i-center_y,x+j-center_x,a,b)
except:
pass
return results
def correspondence_3D(array_a, array_b):
# convolves array_a across array_b but subtracts rather than multiplies
# assumes dimensions of array_b greater than or equal to corresponding of array_a
results = np.zeros(array_b.shape[:2])
convs = 0
for y in range(len(array_b)):
for x in range(len(array_b[0])):
if convs % 100 == 0:
pass
convs += 1
for i in range(len(array_a)):
for j in range(len(array_a[0])):
for k in range(3):
try:
try:
a = array_a[i,j,k]
except:
a = 0
try:
b = array_b[y+i,x+j,k]
except:
b = 0
results[y,x] += np.absolute(a-b)
except:
pass
return results
def NTSC_Luminance_Equation(pixel):
luminance_coefficients = [0.299, 0.587, 0.114]
return np.dot(pixel, luminance_coefficients)
def get_intensity_image(image):
img_array = np.array(image)
return np.array([[NTSC_Luminance_Equation(img_array[i, j, :])
for j in range(img_array.shape[1])]
for i in range(img_array.shape[0])])
# The second and third image with a baseline of 11in seem to be good candidates
chosen_images = [1, 2]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, [images[chosen_images[0]]] + [images[chosen_images[1]]],
[images_available[chosen_images[0]]] + [images_available[chosen_images[1]]]):
axis.imshow(image)
axis.set_title("Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
chosen_grayscale_images = [get_intensity_image(images[i])/255 for i in chosen_images]
fig, ax = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(20, 7))
for axis, image, image_path in zip(ax, chosen_grayscale_images,
[images_available[chosen_images[0]]] + [images_available[chosen_images[1]]]):
axis.imshow(image, cmap='gray')
axis.set_title("Normalized Grayscale Baseline {} cm".format(image_path.split("_")[-2]))
plt.show()
kernel_size = 17 #assumed odd since that is usual
margin = kernel_size//2
search_range = 25
log_file = open('log.txt','a+')
# Checking to see if the correspondence is working
i = 400
j = 450
kernel_Y_min = i - margin
kernel_Y_max = i + margin + 1
kernel_X_min = j - margin
kernel_X_max = j + margin + 1
print(kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max)
min_Y_window = max(i - search_range, 0)
max_Y_window = min(i + search_range + 1, chosen_grayscale_images[0].shape[0])
min_X_window = j - margin
max_X_window = min(j + (2*search_range) + 1, chosen_grayscale_images[0].shape[1])
print(min_Y_window, max_Y_window, min_X_window, max_X_window)
392 409 442 459 375 426 442 501
# Area from right image to be matched in left image
plt.imshow(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max], cmap='gray')
plt.title('Area to be matched from right image')
plt.show()
# Area in left image to be searched for right image portion
fig,ax = plt.subplots(ncols=2, nrows=1, figsize=(16, 7))
ax[0].imshow(chosen_grayscale_images[0][min_Y_window:max_Y_window, min_X_window:max_X_window], cmap='gray')
ax[0].set_title('Area to be searched in left image')
ax[1].imshow(chosen_grayscale_images[1][min_Y_window:max_Y_window, min_X_window:max_X_window], cmap='gray')
ax[1].set_title('Same area in the right image (to show disparity)')
plt.show()
# Output of correspondence (second coordinate is discrepency)
output = correspondence_2D(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max],
chosen_grayscale_images[0][min_Y_window:max_Y_window, min_X_window:max_X_window])
max_coords = np.unravel_index(np.argmin(output, axis=None), output.shape)
print(max_coords)
(19, 29)
# Check the returned area against the original
fig,ax = plt.subplots(ncols=2, nrows=1, figsize=(16, 7))
ax[0].imshow(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max], cmap='gray')
ax[0].set_title('Original region in right image')
ax[1].imshow(chosen_grayscale_images[0][min_Y_window+max_coords[0]:min_Y_window+max_coords[0]+(kernel_Y_max-kernel_Y_min),
min_X_window+max_coords[1]:min_X_window+max_coords[1]+(kernel_X_max-kernel_X_min)], cmap='gray')
ax[1].set_title('Corresponding region in left image')
plt.show()
def get_kernel_limits(i,j,margin):
kernel_Y_min = i - margin
kernel_Y_max = i + margin + 1
kernel_X_min = j - margin
kernel_X_max = j + margin + 1
#print(kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max)
return kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max
def get_window_limits(i,j,margin,search_range,chosen_grayscale_images):
min_Y_window = max(i - search_range, 0)
max_Y_window = min(i + search_range + 1, chosen_grayscale_images[0].shape[0])
min_X_window = j - margin
max_X_window = min(j + (2*search_range) + 1, chosen_grayscale_images[0].shape[1])
#print(min_Y_window, max_Y_window, min_X_window, max_X_window)
return min_Y_window, max_Y_window, min_X_window, max_X_window
import csv
i = margin
j = margin
step = 5
log_written = []
log = []
current_row = 0
current_col = 0
while i < chosen_grayscale_images[1].shape[0]-margin:
while j < chosen_grayscale_images[1].shape[1]-margin:
if i != current_row or (current_col-j) >= 200:
print(i,j)
current_row = i
current_col = j
kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max = get_kernel_limits(i, j, margin)
min_Y_window, max_Y_window, min_X_window, max_X_window = get_window_limits(i, j, margin, search_range,
chosen_grayscale_images)
output = correspondence_2D(chosen_grayscale_images[1][kernel_Y_min:kernel_Y_max, kernel_X_min:kernel_X_max],
chosen_grayscale_images[0][min_Y_window:max_Y_window, min_X_window:max_X_window])
max_coords = np.unravel_index(np.argmin(output, axis=None), output.shape)
#print(max_coords)
log.append((i,j,
kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max,
min_Y_window, max_Y_window, min_X_window, max_X_window,
max_coords[0], max_coords[1]))
#print()
j += step
if len(log):
with open('log.csv','a',newline='') as file:
writer = csv.writer(file)
for entry in log:
writer.writerow(entry)
log_written += log
log = []
file.close()
i += step
j = margin
parallel = Image.open('../images/parallel_programming.png')
plt.figure(figsize=(30,20))
plt.imshow(parallel)
plt.show()
import csv
import numpy as np
log_file = open('test/log.csv','r')
reader = csv.reader(log_file)
discrepancies = np.array([[np.NaN]*chosen_grayscale_images[0].shape[1]]*chosen_grayscale_images[0].shape[0])
depths = np.array([[np.NaN]*chosen_grayscale_images[0].shape[1]]*chosen_grayscale_images[0].shape[0])
points = []
values = []
for row in reader:
i = int(row[0])
j = int(row[1])
if (i-8)%5==0 and (j-8)%5==0:
points.append([i,j])
kernel_Y_min,kernel_Y_max,kernel_X_min,kernel_X_max = get_kernel_limits(i, j, margin)
discrepancy = int(row[-1])
values.append(discrepancy)
try:
depth = (4.25) * (11 * 2.54 * 10) / (discrepancy*0.00349242)
except:
depth = np.inf
for y in range(kernel_Y_min, kernel_Y_max):
for x in range(kernel_X_min, kernel_X_max):
discrepancies[y,x] = discrepancy
depths[y,x] = depth
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(discrepancies, cmap = 'rainbow')
ax[1].set_title('Disparity accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(0,60,5))
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(depths, cmap = 'rainbow')
ax[1].set_title('Depth accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(0,100,10))
perlow, perhigh = np.percentile(np.nan_to_num(depths),[10,90])
depths = np.clip(depths, perlow, perhigh)
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(depths, cmap = 'rainbow')
ax[1].set_title('Depth accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(0,100))
from scipy.interpolate import griddata
points = np.array(points)
values = np.array(values)
grid_x, grid_y = np.mgrid[0:600, 0:800]
grid_z0 = griddata(points, values, (grid_x, grid_y), method='nearest')
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(grid_z0, cmap = 'rainbow')
ax[1].set_title('Interpolation of disparity accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(-10,59,5))
val_per_low, val_per_high = np.percentile(values, [10,90])
depths_2 = []
for val in np.clip(values,val_per_low, val_per_high):
depths_2.append((4.25 * 11 * 2.54 * 10) / (val * 0.00349242))
depths_2 = np.array(depths_2)
grid_x, grid_y = np.mgrid[0:600, 0:800]
grid_z0 = griddata(points, depths_2, (grid_x, grid_y), method='nearest')
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(grid_z0, cmap = 'rainbow')
ax[1].set_title('Interpolation of disparity accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(np.min(depths_2), np.max(depths_2), (np.max(depths_2)-np.min(depths_2))/10))
depths_3 = []
for val in values:
depths_3.append((4.25 * 11 * 2.54 * 10) / (val * 0.00349242))
depth_arr = np.array(depths_3)
per_low, per_high = np.percentile(np.nan_to_num(depth_arr),[10,90])
depth_arr = np.clip(depth_arr, per_low, per_high)
grid_x, grid_y = np.mgrid[0:600, 0:800]
grid_z0 = griddata(points, depth_arr, (grid_x, grid_y), method='nearest')
/Users/pthaper/miniconda3/lib/python3.6/site-packages/ipykernel_launcher.py:3: RuntimeWarning: divide by zero encountered in double_scalars This is separate from the ipykernel package so we can avoid doing imports until
fig, ax = plt.subplots(ncols=2, figsize=(20,7))
ax[0].imshow(images[chosen_images[1]])
ax[0].set_title('Original Right Image')
cax = ax[1].imshow(grid_z0, cmap = 'rainbow')
ax[1].set_title('Interpolation of disparity accros the image')
cbar = fig.colorbar(cax, ticks=np.arange(np.min(depth_arr), np.max(depth_arr), (np.max(depth_arr)-np.min(depth_arr))/10))